Lib: D13_D33_D45_D47_CD4_Proliferated_L2
The hastag antibodies used were:
# lib name
hto_used = samplesheet %>%
filter(Library_name == lib_name) %>%
pull(Antibody) %>%
unique
hto_used
## [1] "TotalSeq_C_1" "TotalSeq_C_5" "TotalSeq_C_6" "TotalSeq_C_7"
## [5] "TotalSeq_C_8" "TotalSeq_C_9" "TotalSeq_C_10" "TotalSeq_C_11"
## [9] "TotalSeq_C_12" "TotalSeq_C_2" "TotalSeq_C_3" "TotalSeq_C_4"
Number of features (genes) and samples (cells) in this library
# Load the PBMC dataset
data <- Read10X(data.dir = paste0("../Data/",lib_name,"/filtered_feature_bc_matrix/"))
# for CellBender, do on cmd:
# ptrepack --complevel 5 output.h5:/matrix output_filtered_seurat.h5:/matrix
# output from Cellbender:
#data <- Read10X(data.dir = paste0("../Data/",lib_name,"/sample_filtered_feature_bc_matrix/"))
#data <- Read10X_h5(filename = "../Data/D33_D45_D47_CD4_Tcells/h5/output_filtered_seurat.h5")
gex <- data[[1]]
hto <- data[["Antibody Capture"]][rownames(data[["Antibody Capture"]]) %in% hto_used, ]
# remove SCoV2 genes
# plasmo.gex <- gex[grep(rownames(gex), pattern = "^gene-PF3D7"),] # assay with Plasmodium genes
# human.gex <- gex[grep(rownames(gex), pattern = "^gene-PF3D7", invert = T),]
# Initialize the Seurat object with the raw (non-normalized data).
#mono <- CreateSeuratObject(counts = gex, project = lib_name, min.cells = 3, min.features = 200)
options(Seurat.object.assay.calcn = TRUE)
hustle <- CreateSeuratObject(counts = gex, project = lib_name, min.cells = 3, min.features = 10)
hustle
## An object of class Seurat
## 18492 features across 12604 samples within 1 assay
## Active assay: RNA (18492 features, 0 variable features)
## 1 layer present: counts
hustle[["percent.mt"]] <- PercentageFeatureSet(hustle, pattern = "^MT-")
# ribo genes
ribo.genes <- grep("^RP[SL][[:digit:]]|^RPLP[[:digit:]]|^RPSA",rownames(hustle),value = TRUE)
ribo.genes <- grep("[BK-]|AP|[[:digit:]](.+)L",ribo.genes,value = TRUE, invert = T)
# percent.ribo <- colSums(hustle[ribo.genes, ])/colSums(hustle)
# hustle <- AddMetaData(object = hustle, metadata = percent.ribo, col.name = "percent.ribo")
hustle[["percent.ribo"]] <- PercentageFeatureSet(object = hustle, features = ribo.genes)
head(hustle@meta.data, 5)
Demultiplex: Assign donor to cells - singlets, doublets, negatives using vireo
# install / upgrade vireoSNP using pip install --upgrade --no-deps vireoSNP
# vireo to check installation errors
# download all cellSNP file from library -> save vcf file as .gz, leave others un-gzipped
# error: index out of range -> solved by downloading the files again and redoing vireo
# command line: vireo -c cellSNP_mat/ -N 3 -o vireo_result/ for 3 donors
# add vireo output to meta data
hustle.demul <- hustle
snp <- read.delim(paste("../Data/",lib_name,"/vireo_result/donor_ids.tsv", sep = ""))
meta <- hustle.demul@meta.data %>%
tibble::rownames_to_column("cell") %>%
left_join(snp)
table(meta$donor_id)
##
## donor0 donor1 donor2 donor3 doublet unassigned
## 9513 1277 665 135 654 360
# remove doublets and unassigned
hustle.demul@meta.data <- meta
hustle.demul@meta.data <- hustle.demul@meta.data %>%
as.data.frame() %>%
tibble::column_to_rownames("cell")
## didn't need the next line for CellBender output:
Idents(hustle.demul) <- "donor_id"
hustle.demul <- subset(x = hustle.demul, idents = c("doublet", "unassigned"), invert = T)
hustle <- hustle.demul
# identical(meta$cell, rownames(mono@meta.data))
# [1] TRUE
#Cell Hashing uses oligo-tagged antibodies against ubuquitously expressed surface proteins to place a “sample barcode” on each single cell, enabling different samples to be multiplexed together and run in a single experiment.
# Select cell barcodes detected by both RNA and HTO In the example datasets we have already
# filtered the cells for you, but perform this step for clarity.
hustle.singlet.donor <- hustle
joint.bcs <- intersect(rownames(hustle.singlet.donor@meta.data), colnames(hto))
# identical(colnames(pbmc@assays$RNA), rownames(pbmc@meta.data))
# [1] TRUE
# Subset RNA and HTO counts by joint cell barcodes
#pbmc@assays$RNA <- pbmc@assays$RNA[, joint.bcs]
hustle.hto <- as.matrix(hto[,joint.bcs])
## # Add HTO data as a new assay independent from RNA
Idents(hustle.singlet.donor) <- rownames(hustle.singlet.donor@meta.data)
hustle.singlet.donor[["HTO"]] <- CreateAssayObject(counts = hustle.hto)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
hustle.singlet.donor <- NormalizeData(hustle.singlet.donor, assay = "HTO", normalization.method = "CLR")
# If you have a very large dataset we suggest using k_function = 'clara'. This is a k-medoid
# clustering function for large applications You can also play with additional parameters (see
# documentation for HTODemux()) to adjust the threshold for classification Here we are using
# the default settings
hustle.singlet.donor <- HTODemux(hustle.singlet.donor, assay = "HTO", positive.quantile = 0.99) #, positive.quantile = 0.99
#hustle.singlet.donor <- MULTIseqDemux(hustle.singlet.donor, assay = "HTO")
# Global classification results
table(hustle.singlet.donor$HTO_classification.global)
##
## Doublet Negative Singlet
## 4152 59 7379
#table(hustle.singlet.donor$MULTI_classification)
Idents(hustle.singlet.donor) <- "HTO_classification.global"
#Idents(hustle.singlet.donor) <- "MULTI_classification"
VlnPlot(hustle.singlet.donor, features = c("nFeature_RNA", "nCount_RNA"), pt.size = 0.1, log = F)
#VlnPlot(hustle.singlet.donor, features = c("percent.mt", "percent.plasmo"), pt.size = 0.1, log = TRUE)
hustle.h <- hustle.singlet.donor
# First, we will remove negative cells from the object
# if(any(hustle.h$MULTI_classification == "Negative") == T)
# hustle.h <- subset(hustle.h, idents = c("Negative"), invert = TRUE)
if(any(hustle.h$HTO_classification.global == "Negative") == T)
hustle.h <- subset(hustle.h, idents = c("Negative"), invert = TRUE)
# Calculate a tSNE embedding of the HTO data
DefaultAssay(hustle.h) <- "HTO"
hustle.h <- ScaleData(hustle.h, features = rownames(hustle.h),
verbose = FALSE)
hustle.h <- RunPCA(hustle.h, features = rownames(hustle.h), approx = FALSE)
hustle.h <- RunTSNE(hustle.h, check_duplicates = F)
Idents(hustle.h) <- 'HTO_classification'
#Idents(hustle.h) <- 'MULTI_classification'
#DimPlot(hustle.h)
# To increase the efficiency of plotting, you can subsample cells using the num.cells argument
#HTOHeatmap(hustle, assay = "HTO", ncells = 500)
Features and samples after assigning cells to donors and retaining only singlets
Idents(hustle.singlet.donor) = "HTO_classification"
# Extract singlets
#hashtags <- gsub("Total_Seq", "TotalSeq", hto_used)
hashtags <- gsub("_", "-", hto_used)
# hustle.singlet.donor@meta.data <- hustle.singlet.donor@meta.data %>%
# mutate(Cell_state = case_when(
# as.character(MULTI_classification) %in% hashtags ~ "Singlet",
# .default = MULTI_classification
# ))
#Idents(hustle.singlet.donor) <- "Cell_state"
#hustle.singlet <- subset(hustle.singlet.donor, subset = Cell_state == "Singlet")
# For HTODemux:
hustle.singlet.donor = hustle.h
Idents(hustle.singlet.donor) = "HTO_classification.global"
hustle.singlet <- subset(hustle.singlet.donor, idents = "Singlet")
hustle.singlet
## An object of class Seurat
## 18504 features across 7379 samples within 2 assays
## Active assay: HTO (12 features, 0 variable features)
## 3 layers present: counts, data, scale.data
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, tsne
# donorid <- samplesheet %>%
# filter(Library_name == lib_name) %>%
# dplyr::rename(HTO_classification = Antibody) %>%
# #mutate(HTO_classification = gsub("_", "-", HTO_classification)) %>%
# dplyr::rename(Group = `Patient Group`) %>%
# select(Group, HTO_classification) %>%
# distinct()
# donorid <- samplesheet %>%
# filter(Library_name == lib_name) %>%
# dplyr::rename(MULTI_classification = Antibody) %>%
# mutate(MULTI_classification = gsub("_", "-", MULTI_classification)) %>%
# dplyr::rename(Group = `Patient Group`) %>%
# select(Group, MULTI_classification) %>%
# distinct()
hustle.demul <- hustle.singlet
# hustle.demul@meta.data <- hustle.demul@meta.data %>%
# left_join(., donorid)
rownames(hustle.demul@meta.data) <- rownames(hustle.singlet@meta.data)
#rownames(hustle.singlet@meta.data)
hustle <- hustle.singlet
QC for number of RNA molecules (nCount_RNA), number of features (nFeature_RNA), mitochondrial RNA percent denoting dead cells (percent.mt) and ribosomal genes percentage (NA cells because of unmatched CellBender cell filtration)
# Visualize QC metrics as a violin plot
DefaultAssay(hustle) <- "RNA"
Idents(hustle) <- "orig.ident"
VlnPlot(hustle, features=c("nCount_RNA", "nFeature_RNA", "percent.mt"), ncol = 3)
VlnPlot(hustle, features=c("percent.ribo"))
ggplot(hustle@meta.data, aes(x = nFeature_RNA)) + geom_histogram(binwidth = 50)
#ggplot(mono@meta.data, aes(x = nFeature_RNA)) + geom_histogram(binwidth = 20) + xlim(c(0, 600))
ggplot(hustle@meta.data, aes(x = nFeature_RNA)) +
geom_histogram(binwidth = 50) +
facet_wrap( ~ donor_id)
ggplot(hustle@meta.data, aes(x = nCount_RNA)) + geom_histogram(binwidth = 50)
ggplot(hustle@meta.data, aes(x = nCount_RNA)) +
geom_histogram(binwidth = 50) +
facet_wrap( ~ donor_id)
#ggplot(mono@meta.data, aes(x = nCount_RNA)) + geom_histogram(binwidth = 50) + xlim(c(0, 5000))
ggplot(hustle@meta.data, aes(x = percent.mt)) + geom_histogram(binwidth = 1)
ggplot(hustle@meta.data, aes(x = percent.mt)) +
geom_histogram(binwidth = 1) +
facet_wrap( ~ donor_id)
ggplot(hustle@meta.data, aes(x = percent.ribo)) + geom_histogram(binwidth = 1)
ggplot(hustle@meta.data, aes(x = percent.ribo)) +
geom_histogram(binwidth = 1) +
facet_wrap( ~ donor_id)
#ggplot(mono@meta.data, aes(x = percent.Ribosomal)) + geom_histogram(binwidth = 1)
#ggplot(mono@meta.data, aes(x = percent.Largest.Gene)) + geom_histogram(binwidth = 1)
deadcell_nF_lowercutoff <- 250
deadcell_nF_uppercutoff <- 3500
deadcell_nC_lowercutoff <- 1250
#weirdcell_ribo_uppercutoff <- 40
deadcell_mt_cutoff <- 12
hustle.meta <- hustle@meta.data %>%
mutate(is.dead = case_when(
nFeature_RNA >= deadcell_nF_lowercutoff &
nFeature_RNA <= deadcell_nF_uppercutoff &
#percent.ribo <= weirdcell_ribo_uppercutoff &
nCount_RNA >= deadcell_nC_lowercutoff &
percent.mt <= deadcell_mt_cutoff ~ "FALSE",
TRUE ~ "TRUE"))
table(hustle.meta$is.dead)
##
## FALSE TRUE
## 6379 1000
ggplot(hustle.meta, aes(x = nFeature_RNA, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
ggplot(hustle.meta, aes(x = nCount_RNA, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
ggplot(hustle.meta, aes(x = percent.mt, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
ggplot(hustle.meta, aes(x = percent.ribo, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
hustle1 <- subset(hustle, subset = nFeature_RNA >= deadcell_nF_lowercutoff &
nFeature_RNA <= deadcell_nF_uppercutoff &
percent.mt <= deadcell_mt_cutoff &
nCount_RNA >= deadcell_nC_lowercutoff
#percent.ribo <= weirdcell_ribo_uppercutoff
) #nCount_RNA >= 1000 &
hustle1
## An object of class Seurat
## 18504 features across 6379 samples within 2 assays
## Active assay: RNA (18492 features, 0 variable features)
## 1 layer present: counts
## 1 other assay present: HTO
## 2 dimensional reductions calculated: pca, tsne
VlnPlot(hustle1, features=c("nCount_RNA", "nFeature_RNA", "percent.mt", "percent.ribo"), group.by = "orig.ident", ncol = 2)
hustle = hustle1
Assigning hashtags and peptide pools to cells
# assigning cell to donor and pool
hustle@meta.data <- hustle@meta.data %>%
mutate(Donor_expt = case_when(
HTO_classification == "TotalSeq-C-1" ~ "D13",
HTO_classification == "TotalSeq-C-2" ~ "D33",
HTO_classification == "TotalSeq-C-3" ~ "D45",
HTO_classification == "TotalSeq-C-4" ~ "D47",
.default = ""
)) %>%
mutate(pep.pool = case_when(
HTO_classification == "TotalSeq-C-5" ~ "P2",
HTO_classification == "TotalSeq-C-6" ~ "P3",
HTO_classification == "TotalSeq-C-7" ~ "P4",
HTO_classification == "TotalSeq-C-8" ~ "P5",
HTO_classification == "TotalSeq-C-9" ~ "P6",
HTO_classification == "TotalSeq-C-10" ~ "P7",
HTO_classification == "TotalSeq-C-11" ~ "P8",
HTO_classification == "TotalSeq-C-12" ~ "P9",
HTO_classification == "TotalSeq-C-2" ~ "P1",
HTO_classification == "TotalSeq-C-3" ~ "P1",
HTO_classification == "TotalSeq-C-4" ~ "P1",
HTO_classification == "TotalSeq-C-1" ~ "P1",
.default = ""
))
# get donor id from SNP for assigning D33, D45, D47
donorid_snp_hto <- hustle@meta.data %>%
dplyr::select(Donor_expt, donor_id) %>%
filter(Donor_expt != "") %>%
filter(!donor_id %in% c("doublet", "unassigned")) %>%
group_by(donor_id) %>%
count(Donor_expt, donor_id)
#Put them back in hustle meta data
hustle@meta.data <- hustle@meta.data %>%
mutate(Donor_expt = case_when(
HTO_classification == "TotalSeq-C-1" ~ "D13",
HTO_classification == "TotalSeq-C-2" ~ "D33",
HTO_classification == "TotalSeq-C-3" ~ "D45",
HTO_classification == "TotalSeq-C-4" ~ "D47",
donor_id == "donor0" ~ "D47",
donor_id == "donor1" ~ "D33",
donor_id == "donor2" ~ "D13",
donor_id == "donor3" ~ "D45",
.default = ""
)) %>%
filter(!is.na(donor_id)) %>%
mutate(donor_pep.pool = paste(Donor_expt, pep.pool, sep = "_"))
# # remove left over unassigned and doublet "cells"
# Idents(hustle) <- c("donor_id")
# hustle <- subset(hustle, idents = c("doublet", "unassigned"), invert = T)
table(hustle$Donor_expt)
##
## D13 D33 D45 D47
## 335 637 56 5351
table(hustle$pep.pool)
##
## P1 P2 P3 P4 P5 P6 P7 P8 P9
## 1539 600 940 665 191 808 1002 105 529
table(hustle$Donor_expt, hustle$pep.pool)
##
## P1 P2 P3 P4 P5 P6 P7 P8 P9
## D13 124 24 40 40 6 7 23 12 59
## D33 224 22 21 149 69 32 39 54 27
## D45 13 2 4 3 11 8 6 0 9
## D47 1178 552 875 473 105 761 934 39 434
table(hustle$donor_pep.pool)
##
## D13_P1 D13_P2 D13_P3 D13_P4 D13_P5 D13_P6 D13_P7 D13_P8 D13_P9 D33_P1 D33_P2
## 124 24 40 40 6 7 23 12 59 224 22
## D33_P3 D33_P4 D33_P5 D33_P6 D33_P7 D33_P8 D33_P9 D45_P1 D45_P2 D45_P3 D45_P4
## 21 149 69 32 39 54 27 13 2 4 3
## D45_P5 D45_P6 D45_P7 D45_P9 D47_P1 D47_P2 D47_P3 D47_P4 D47_P5 D47_P6 D47_P7
## 11 8 6 9 1178 552 875 473 105 761 934
## D47_P8 D47_P9
## 39 434
Cells with assigned clonotypes
add_clonotype <- function(seurat_obj){
tcr <- read.csv(paste0("../Data/", lib_name, "/filtered_contig_annotations.csv"))
# Clonotype-centric info.
clono <- read.csv(paste0("../Data/", lib_name, "/clonotypes.csv"))
# Subsets so only the first line of each barcode is kept,
# as each entry for given barcode will have same clonotype.
tcr <- tcr %>%
distinct(barcode, .keep_all = T) %>%
# Only keep the barcode and clonotype columns.
# We'll get additional clonotype info from the clonotype table.
dplyr::select(barcode, raw_clonotype_id) %>%
rename(clonotype_id = raw_clonotype_id) %>%
# Slap the AA sequences onto our original table by clonotype_id.
merge(., clono[, c("clonotype_id", "cdr3s_aa")]) %>%
tibble::column_to_rownames("barcode")
seurat_obj <- seurat_obj[,rownames(tcr)]
# Add to the Seurat object's metadata.
clono_seurat <- AddMetaData(object=seurat_obj, metadata=tcr)
return(clono_seurat)
}
Idents(hustle) <- rownames(hustle@meta.data)
hustle <- add_clonotype(hustle)
table(!is.na(hustle$clonotype_id))
##
## TRUE
## 5523
# FALSE TRUE
# 515 834
# keep only the clonotypes
hustle <- subset(hustle, cells = colnames(hustle)[!is.na(hustle$clonotype_id)])
hustle
## An object of class Seurat
## 18504 features across 5523 samples within 2 assays
## Active assay: RNA (18492 features, 0 variable features)
## 1 layer present: counts
## 1 other assay present: HTO
## 2 dimensional reductions calculated: pca, tsne
Idents(hustle) <- "HTO_maxID"
RidgePlot(hustle, assay = "HTO", features = rownames(hustle@assays[["HTO"]])[1:6])
RidgePlot(hustle, assay = "HTO", features = rownames(hustle@assays[["HTO"]])[7:12])
# Variable genes, cell cycle regression; ribosomal expression regression; principal components from elbow plot
# hustle <- NormalizeData(hustle, normalization.method = "LogNormalize", scale.factor = 10000)
# s.genes <- cc.genes$s.genes
# g2m.genes <- cc.genes$g2m.genes
# hustle <- CellCycleScoring(hustle, g2m.features = g2m.genes, s.features = s.genes)
# hustle <- FindVariableFeatures(hustle, selection.method = "vst", nfeatures = 2000)
# all.genes <- rownames(hustle)
# hustle <- ScaleData(hustle) #, features = all.genes
# hustle <- RunPCA(hustle)
# ElbowPlot(hustle)
# VizDimLoadings(hustle, dims = 1:2, reduction = "pca")
# DimHeatmap(hustle, dims = 1:5, cells = 500, balanced = TRUE)
# hustle <- FindNeighbors(hustle, dims = 1:12)
# hustle <- FindClusters(hustle, resolution = 0.8)
# hustle <- RunUMAP(hustle, dims = 1:12, reduction = "pca")
# Identify the 15 most highly variable genes
# ranked_variable_genes <- VariableFeatures(hustle)
# top_genes <- ranked_variable_genes[1:15]
### ribo module score
# hustle <- AddModuleScore(
# object = hustle,
# features = ribo.genes,
# name = 'RiboScore'
# )
# FeaturePlot(hustle, features = "RiboScore1")
# Plot the average expression and variance of these genes
# With labels to indicate which genes are in the top 15
# p <- VariableFeaturePlot(hustle)
# LabelPoints(plot = p, points = top_genes, repel = TRUE)
# hustle <- RunPCA(hustle, features = VariableFeatures(object = hustle))
# DimPlot(hustle,
# reduction = "pca",
# group.by= "Phase")
# hustle <- ScaleData(hustle, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(hustle))
# hustle <- RunPCA(hustle, features = VariableFeatures(object = hustle), npcs = 15, nfeatures.print = 10)
# DimPlot(hustle, reduction = "pca", group.by = "Phase")
# hustle <- RunPCA(hustle, features = c(s.genes, g2m.genes), npcs = 15)
# DimPlot(hustle, reduction = "pca", group.by = "Phase")
# hustle <- ScaleData(hustle, vars.to.regress = c("percent.ribo"), features = rownames(hustle))
# hustle <- RunPCA(hustle, features = c(ribo.genes), npcs = 15)
# Idents(hustle) <- "RiboScore1"
# DimPlot(hustle, reduction = "pca") + NoLegend()
#hustle <- SCTransform(hustle, vars.to.regress = c("S.Score", "G2M.Score", "percent.ribo"))
#hustle <- SCTransform(hustle, vars.to.regress = c("S.Score", "G2M.Score"))
# hustle <- JackStraw(hustle, num.replicate = 70)
# hustle <- ScoreJackStraw(hustle, dims = 1:10)
#JackStrawPlot(hustle, dims = 1:10)
use.pcs = 1:17
8482 -> saved.seed
hustle <- NormalizeData(hustle, normalization.method = "LogNormalize", scale.factor = 10000)
hustle <- FindVariableFeatures(hustle, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(hustle)
hustle <- ScaleData(hustle) #, features = all.genes
hustle <- RunPCA(hustle)
hustle <- RunTSNE(hustle, dims=use.pcs, seed.use = saved.seed, perplexity=100)
ElbowPlot(hustle)
VizDimLoadings(hustle, dims = 1:2, reduction = "pca")
DimHeatmap(hustle, dims = 1:5, cells = 500, balanced = TRUE)
hustle <- FindNeighbors(hustle, dims = use.pcs)
hustle <- RunUMAP(hustle, dims = use.pcs, reduction = "pca")
hustle <- FindClusters(hustle, resolution = 1) #, graph.name = "RNA_snn_res.0.9"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5523
## Number of edges: 183275
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7541
## Number of communities: 12
## Elapsed time: 0 seconds
DimPlot(hustle, reduction = "umap")
#png("HUSTLEseq_cellcycle_ambientRNA_PCs.png", width = 9, height = 5)
#DimHeatmap(hustle, dims = 1:5, nfeatures = 30)
#dev.off()
FeaturePlot(hustle, feature = "MKI67", reduction = "umap")
Gene markers for clusters
markers_all = FindAllMarkers(hustle,genes.use = VariableFeatures(hustle),
only.pos = F,
min.pct = 0.25,
thresh.use = 0.25)
DT::datatable(markers_all)
Gene markers unique to each cluster
markers_all_single <- markers_all[markers_all$gene %in% names(table(markers_all$gene))[table(markers_all$gene) == 1],]
DT::datatable(markers_all_single)
Top N marker genes
top15_markers <- markers_all %>%
group_by(cluster) %>%
slice_max(n = 20, order_by = abs(avg_log2FC))
DT::datatable(top15_markers)
top10 <- markers_all %>% group_by(cluster) %>% top_n(10, avg_log2FC)
#FeaturePlot(hustle, features = top10$gene[6:10])
Visualise cells with assigned clonotypes, predicted celltypes and cell clusters
hustle <- RunAzimuth(hustle, reference = "pbmcref")
DimPlot(hustle, reduction = "umap", group.by = "clonotype_id", label = F) + NoLegend()
DimPlot(hustle, reduction = "umap", group.by = "predicted.celltype.l2", label = T)
#DimPlot(hustle, reduction = "umap", label = TRUE)
#ggsave("HUSTLEseq_cellcycle_ambientRNA_clusters.png", width = 9, height = 5)
Heatmap with top 10 markers
DoHeatmap(
object = hustle,
features = top10$gene
) +
theme(text = element_text(size = 5))
#ggsave("HUSTLEseq_cellcycle_ambientRNA_markergenes_heatmap.png", width = 12, height = 9)
# write.table(top15_markers, "hustleseq_top15_markers_6clusters.txt", row.names = F, quote = F)
# write.table(markers_all_single, "hustleseq_unique_markers_6clusters.txt", row.names = F, quote = F)
Cells mapped to specific epitopes
# hustle.meta <- data.frame(hustle@reductions[["umap"]]@cell.embeddings) %>%
# tibble::rownames_to_column("Cell") %>%
# mutate(clonotype_id = substr(hustle@meta.data$clonotype_id, 9, nchar(hustle@meta.data$clonotype_id))) %>%
# mutate(clone_colour = case_when(
# clonotype_id == "clonotype1" ~ "cornflowerblue",
# clonotype_id == "clonotype2" ~ "cornflowerblue",
# clonotype_id == "clonotype4" ~ "salmon",
# clonotype_id == "clonotype6" ~ "lightgreen",
# clonotype_id == "clonotype9" ~ "navyblue",
# clonotype_id == "clonotype24" ~ "darkred",
# clonotype_id == "clonotype43" ~ "yellow4",
# .default = "gray80"
# )) %>%
# mutate(peptide.pool = hustle$pep.pool) %>%
# mutate(pep.pool.colour = case_when(
# peptide.pool == "P1" & clone_colour != "gray80" ~ "#440154",
# peptide.pool == "P2" & clone_colour != "gray80" ~ "#472d7b",
# peptide.pool == "P3" & clone_colour != "gray80" ~ "#3b528b",
# peptide.pool == "P4" & clone_colour != "gray80" ~ "#2c728e",
# peptide.pool == "P5" & clone_colour != "gray80" ~ "#21918c",
# peptide.pool == "P6" & clone_colour != "gray80" ~ "#28ae80",
# peptide.pool == "P7" & clone_colour != "gray80" ~ "#5ec962",
# peptide.pool == "P8" & clone_colour != "gray80" ~ "#addc30",
# peptide.pool == "P9" & clone_colour != "gray80" ~ "#fde725",
# .default = "gray80"
# ))
#
# ggplot(hustle.meta, aes(umap_1, umap_2, label = clonotype_id,), colour = clonotype_id) +
# geom_point(size = 2, alpha = 0.8, colour = hustle.meta$clone_colour) +
# theme_bw() +
# theme(legend.position = "right")
# #ggsave("HUSTLEseq_cellcycle_ambientRNA_clonotypesOfInterest.png", width = 9, height = 7)
#
# hustle.meta1 <- data.frame(hustle@reductions[["umap"]]@cell.embeddings) %>%
# tibble::rownames_to_column("Cell") %>%
# mutate(clonotype_id = substr(hustle@meta.data$clonotype_id, 9, nchar(hustle@meta.data$clonotype_id))) %>%
# mutate(clone_colour1 = case_when(
# clonotype_id == "clonotype1" ~ "cornflowerblue",
# .default = "gray80"
# )) %>%
# mutate(peptide.pool = hustle$pep.pool) %>%
# mutate(pep.pool.colour1 = case_when(
# peptide.pool == "P1" & clone_colour1 != "gray80" ~ "#440154",
# .default = "gray80"
# ))
#
# ggplot(hustle.meta1, aes(umap_1, umap_2, label = clonotype_id,), colour = clonotype_id) +
# geom_point(size = 2, alpha = 0.8, colour = hustle.meta1$clone_colour1) +
# theme_bw() +
# theme(legend.position = "right")
# #ggsave("HUSTLEseq_cellcycle_ambientRNA_clonotype1.png", width = 9, height = 7)
Peptide pool didn’t determine phenotype
# ggplot(hustle.meta, aes(umap_1, umap_2, label = peptide.pool, fill = peptide.pool, colour = peptide.pool)) +
# geom_point(size = 3, alpha = 0.8) +
# theme_bw()